/* ========================================================================== *
 *   Copyright (c) 2006, Pier Paolo Fumagalli <mailto:pier@betaversion.org>   *
 *                            All rights reserved.                            *
 * ========================================================================== *
 *                                                                            * 
 * Redistribution and use in source and binary forms, with or without modifi- *
 * cation, are permitted provided that the following conditions are met:      *
 *                                                                            * 
 *  - Redistributions of source code must retain the  above copyright notice, *
 *    this list of conditions and the following disclaimer.                   *
 *                                                                            * 
 *  - Redistributions  in binary  form  must  reproduce the  above  copyright *
 *    notice,  this list of conditions  and the following  disclaimer  in the *
 *    documentation and/or other materials provided with the distribution.    *
 *                                                                            * 
 *  - Neither the name of Pier Fumagalli, nor the names of other contributors *
 *    may be used to endorse  or promote products derived  from this software *
 *    without specific prior written permission.                              *
 *                                                                            * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" *
 * AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING, BUT NOT LIMITED TO, THE *
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE *
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER  OR CONTRIBUTORS BE *
 * LIABLE  FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR *
 * CONSEQUENTIAL  DAMAGES  (INCLUDING,  BUT  NOT LIMITED  TO,  PROCUREMENT OF *
 * SUBSTITUTE GOODS OR SERVICES;  LOSS OF USE, DATA, OR PROFITS;  OR BUSINESS *
 * INTERRUPTION)  HOWEVER CAUSED AND ON  ANY THEORY OF LIABILITY,  WHETHER IN *
 * CONTRACT,  STRICT LIABILITY,  OR TORT  (INCLUDING NEGLIGENCE OR OTHERWISE) *
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE *
 * POSSIBILITY OF SUCH DAMAGE.                                                *
 * ========================================================================== */
package it.could.confluence.autoexport.engine;

import it.could.confluence.autoexport.ConfigurationManager;
import it.could.confluence.autoexport.ExportManager;
import it.could.confluence.autoexport.LocationManager;
import it.could.util.encoding.EncodingTools;
import it.could.util.location.Location;
import it.could.util.location.Parameters;
import it.could.util.location.Path;
import it.could.util.location.PathElement;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;

import org.apache.log4j.Logger;
import org.apache.xerces.util.XMLAttributesImpl;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLDocumentFilter;
import org.cyberneko.html.HTMLAugmentations;
import org.cyberneko.html.filters.Purifier;
import org.cyberneko.html.filters.Writer;
import org.cyberneko.html.parsers.SAXParser;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import com.atlassian.confluence.pages.AbstractPage;
import com.atlassian.confluence.pages.Attachment;
import com.atlassian.confluence.pages.PageManager;
import com.atlassian.confluence.servlet.simpledisplay.BlogPathConverter;
import com.atlassian.confluence.servlet.simpledisplay.MailPathConverter;
import com.atlassian.confluence.servlet.simpledisplay.PagePathConverter;
import com.atlassian.confluence.servlet.simpledisplay.PathConverter;
import com.atlassian.confluence.servlet.simpledisplay.SpacePathConverter;
import com.atlassian.confluence.servlet.simpledisplay.UserPathConverter;
import com.atlassian.confluence.spaces.Space;
import com.atlassian.confluence.spaces.SpaceManager;
import com.atlassian.spring.container.ContainerManager;

/**
 * <p>The {@link ExportBeautifier} is used by the {@link ExportManager} to
 * beautify the generated HTML and to fix links generated by macros directly
 * pointing to Confluence.</p>
 * 
 * <p>This class is required as most macros don't make use of the Confluence's
 * link renderer mechanism, but directly insert the web-application path inside
 * the output.</p>
 */
public class ExportBeautifier extends Writer {

    private static final String E_HEAD = "HEAD";
    private static final String E_META = "META";
    private static final String A_HTTP_EQUIV = "http-equiv";
    private static final String A_HREF = "href";
    private static final String A_SRC = "src";
    private static final String A_CONTENT = "content";
    private static final String V_CONTENT_TYPE = "content-type";
    
    private static final PathConverter CONVERTERS[] = new PathConverter[] {
                                                      new PagePathConverter(),
                                                      new BlogPathConverter(),
                                                      new SpacePathConverter(),
                                                      new UserPathConverter(),
                                                      new MailPathConverter() };                        

    private static final QName META = new QName("", E_META, E_META, "");
    private static final XMLAttributes META_HTTP_EQUIV_CTYPE;
    private static final Augmentations  AUGMENTATIONS = new HTMLAugmentations();
    private static final Logger LOGGER = Logger.getLogger(ExportBeautifier.class);

    static {
        /* Initialize the path converters */
        for (int x = 0; x < CONVERTERS.length; x++) {
            ContainerManager.autowireComponent(CONVERTERS[x]);
        }

        /*
         * We don't really care what charset name we're inserting, as the
         * CyberNeko HTML writer will always replace the charset. Because
         * of this, we can do the whole thing statically.
         */
        final OutputStream temp = new ByteArrayOutputStream();
        final String charset = new OutputStreamWriter(temp).getEncoding();
        final XMLAttributesImpl atts = new XMLAttributesImpl();
        final QName equiv = new QName("", A_HTTP_EQUIV, A_HTTP_EQUIV, "");
        final QName contn   = new QName("", A_CONTENT,    A_CONTENT,    "");
        atts.addAttribute(equiv, "CDATA", "Content-Type");
        atts.addAttribute(contn, "CDATA", "text/html;charset=" + charset);
        META_HTTP_EQUIV_CTYPE = atts;
    }

    /**
     * <p>A flag indicating whether the meta tag containing the content type
     * has been seen within the document (if not, we'll insert it).</p>
     */
    private boolean hasContentType = false;

    private final String confluenceBase;
    private final Location exportUrl;
    private final Location entityUrl;
    private final Location displayUrl;
    private final Location viewPageUrl;
    private final Location viewSpaceUrl;
    private final Location thumbnailUrl;
    private final Location attachmentUrl;

    private final PageManager pageManager;
    private final SpaceManager spaceManager;
    private final LocationManager locationManager;
    private final String encoding;

    /**
     * <p>Create a new {@link ExportBeautifier} instance.</p>
     */
    public ExportBeautifier(AbstractPage page,
                            ConfigurationManager configurationManager,
                            PageManager pageManager,
                            SpaceManager spaceManager,
                            LocationManager locationManager) {

        this.pageManager = pageManager;
        this.spaceManager = spaceManager;
        this.locationManager = locationManager;

        this.exportUrl = locationManager.getLocation(page);
        this.encoding = configurationManager.getEncoding();
        this.confluenceBase = configurationManager.getConfluenceUrl();
        
        this.entityUrl = Location.parse(this.confluenceBase + page.getUrlPath());
        this.displayUrl = Location.parse(this.confluenceBase + "/display/");
        this.viewPageUrl = Location.parse(this.confluenceBase + "/pages/viewpage.action");
        this.viewSpaceUrl = Location.parse(this.confluenceBase + "/spaces/viewspace.action");
        this.thumbnailUrl = Location.parse(this.confluenceBase + "/download/thumbnails/");
        this.attachmentUrl = Location.parse(this.confluenceBase + "/download/attachments/");
    }

    /**
     * <p>Beautify and fixup the links in the specified HTML content and write
     * it to the specified {@link File}.</p>
     * 
     * @param content the HTML content to beautify, fixup and write.
     * @param file the {@link File} where the content will be written to.
     * @throws SAXException if an exception occurred parsing the content.
     * @throws IOException if an I/O error occurred writing to the file.
     */
    public void beautify(String content, File file)
    throws SAXException, IOException {
        final OutputStream output = new FileOutputStream(file);
        try {
            /* Initialize the output writer */
            final OutputStreamWriter writer = new OutputStreamWriter(output, this.encoding);
            super.fPrinter = new PrintWriter(writer);

            /* Create a way to read the page from the specified string */
            final Reader reader = new StringReader(content);
            final InputSource input = new InputSource(reader);
            final SAXParser parser = new SAXParser();

            /* Set all the required parser features */
            parser.setFeature("http://xml.org/sax/features/namespaces",                              false);
            parser.setFeature("http://cyberneko.org/html/features/balance-tags",                     false);
            parser.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections",           true);
            parser.setFeature("http://apache.org/xml/features/scanner/notify-char-refs",             true);
            parser.setFeature("http://apache.org/xml/features/scanner/notify-builtin-refs",          true);
            parser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs",      true);
            parser.setFeature("http://cyberneko.org/html/features/scanner/fix-mswindows-refs",       true);
            parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
            parser.setFeature("http://cyberneko.org/html/features/report-errors",                    false);

            /* Setup the CyberNeko filter chain (including ourselves writing) */
            parser.setProperty("http://cyberneko.org/html/properties/filters",
                               new XMLDocumentFilter[] { new Purifier(), this });
            /* Parse the input (this will write to the file */
            parser.parse(input);
            
            /* Flush everything that was written */
            super.fPrinter.flush();
            writer.flush();
            output.flush();

        } finally {
            /* No matter what happens, always close the file */
            output.close();
        }
    }

    /**
     * <p>Handle a document type declaration and write it to the output.</p>
     */
    public void doctypeDecl(String name, String pub, String sys,
                            Augmentations augs) {
        if (name == null) return;
        if ("".equals(name)) return;
        if ("".equals(pub)) pub = null;
        if ("".equals(sys)) sys = null;
        this.fPrinter.write("<!DOCTYPE ");
        this.fPrinter.write(name.toUpperCase());
        if (pub != null) {
            this.fPrinter.write(" PUBLIC \"");
            this.fPrinter.write(pub);
            if (sys != null) {
                this.fPrinter.write("\" \"");
                this.fPrinter.write(sys);
            }
            this.fPrinter.write('"');
        } else if (sys != null) {
            this.fPrinter.write(" SYSTEM \"");
            this.fPrinter.write(sys);
            this.fPrinter.write('"');
        }
        this.fPrinter.write('>');
        this.fPrinter.println();
    }

    /**
     * <p>Handle an empty element and write it to the output.</p>
     */
    public void emptyElement(QName name, XMLAttributes atts, Augmentations augs) 
    throws XNIException {
        /* Check for meta tags specifying content type and encoding */
        this.filter(name, atts);
        /* Rewrite wrong links (the ones pointing to confluence) */
        this.filter(atts);
        /* Write the tag down to the file */
        super.emptyElement(name, atts, augs);
    }

    /**
     * <p>Handle the start of an element and write it to the output.</p>
     */
    public void startElement(QName name, XMLAttributes atts, Augmentations augs) 
    throws XNIException {
        /* Check for meta tags specifying content type and encoding */
        this.filter(name, atts);
        /* Rewrite wrong links (the ones pointing to confluence) */
        this.filter(atts);
        /* Write the tag down to the file */
        super.startElement(name, atts, augs);
    }

    /**
     * <p>Handle the end of an element and write it to the output.</p>
     */
    public void endElement(QName name, Augmentations augs) 
    throws XNIException {
        /* Check if we're closing the document's head */
        if (name.rawname.toUpperCase().equals(E_HEAD)) {
            /* If we didn't encounter a meta for the content type, insert it. */
            if (! this.hasContentType) {
                super.emptyElement(META, META_HTTP_EQUIV_CTYPE, AUGMENTATIONS);
            }
        }
        /* Write the tag down to the file */
        super.endElement(name, augs);
    }
    
    /**
     * <p>Override the print entity method to intercept the
     * <code>&amp;apos;</code> entity which is not supported by Microsoft
     * Internet Explorer.</p>
     */
    protected void printEntity(String name) {
        if ("apos".equalsIgnoreCase(name)) super.fPrinter.write('\'');
        else super.printEntity(name);
    }

    /* ====================================================================== */
    /* INTERNAL METHODS PRIVATE TO THIS INSTANCE                              */
    /* ====================================================================== */

    /**
     * <p>Analyse a list of attributes and potentially rewrite links.</p>
     */
    private void filter(XMLAttributes atts) {
        for (int x = 0; x < atts.getLength(); x ++) try {
            final String name = atts.getQName(x).toLowerCase();
            if (! (name.equals(A_HREF) || name.equals(A_SRC))) continue;

            /* Resolve the location against the URL of this page */
            Location location = this.entityUrl.resolve(atts.getValue(x));
            
            /* Remember the fragment, as it might be lost in resolution */
            final String fragment = location.getFragment();

            /* Start by checking if the specified location is a "display" one */
            final Location displayRel = this.displayUrl.relativize(location, false);
            if (displayRel.isRelative()) {
                /* The path needs to be URL decoded before converting it */
                final String encoded = displayRel.getPath().toString();
                final String path = EncodingTools.urlDecode(encoded);

                /* Convert the path as Confluence would, getting the action */
                for (int y = 0; y < CONVERTERS.length; y++) {
                    if (! CONVERTERS[y].handles(path)) continue;
                    final String conv = CONVERTERS[y].getPath(path).getPath();
                    LOGGER.warn("Found converted path" + conv);
                    location = Location.parse(this.confluenceBase + conv);
                    break;
                }
            }

            /* If this is a "viewPage" action, then convert the location */
            if (location.isSame(this.viewPageUrl)) {
                final Parameters parameters = location.getParameters();
                final AbstractPage target;
                if (parameters == null) {
                    target = null;
                } else if (parameters.hasParameter("pageId")) {
                    final String string = (String) parameters.getValue("pageId");
                    final int id = Integer.parseInt(string);
                    target = this.pageManager.getAbstractPage(id);
                } else if (parameters.hasParameter("postingDay")) {
                    final String space = (String) parameters.getValue("spaceKey");
                    final String title = (String) parameters.getValue("title");
                    final String day = (String) parameters.getValue("postingDay");
                    final Date date = new SimpleDateFormat("yyyy/MM/dd").parse(day);
                    final Calendar cal = Calendar.getInstance();
                    cal.setTime(date);
                    target = this.pageManager.getBlogPost(space, title, cal);
                } else if (parameters.hasParameter("title")) {
                    final String space = (String) parameters.getValue("spaceKey");
                    final String title = (String) parameters.getValue("title");
                    target = this.pageManager.getPage(space, title);
                } else {
                    target = null;
                }

                if (target != null) {
                    location = this.locationManager.getLocation(target);
                } else {
                    LOGGER.warn("Error resolving page " + atts.getValue(x));
                    LOGGER.warn("Resolved parameters: " + parameters);
                }
            }

            /* If this is a "viewSpace" action, link to the space home */
            else if (location.isSame(this.viewSpaceUrl)) {
                final Parameters parameters = location.getParameters();
                final Space target;
                if (parameters.hasParameter("key")) {
                    final String string = (String) parameters.getValue("key");
                    target = this.spaceManager.getSpace(string);
                } else {
                    target = null;
                }

                if (target != null) {
                    location = this.locationManager.getLocation(target.getHomePage());
                } else {
                    LOGGER.warn("Error resolving space " + atts.getValue(x));
                    LOGGER.warn("Resolved parameters: " + parameters);
                }
            }
            
            /* Rewrite for thumbnails URLs */
            else if (this.thumbnailUrl.isParent(location)) {
                final Location temp = this.thumbnailUrl.relativize(location);
                final Path path = temp.getPath();
                if (temp.isRelative() && (path.size() > 1)) try {
                    final PathElement pg = (PathElement) path.get(0);
                    final PathElement fn = (PathElement) path.get(1);
                    final long id = Long.parseLong(pg.getName());
                    final AbstractPage ap = this.pageManager.getAbstractPage(id);
                    final Attachment at = ap.getAttachmentNamed(fn.getName());
                    location = this.locationManager.getLocation(at, true);
                } catch (NumberFormatException exception) {
                    LOGGER.error("Error parsing page id", exception);
                }
            }

            /* Rewrite for attachments URLs */
            else if (this.attachmentUrl.isParent(location)) {
                Location temp = this.attachmentUrl.relativize(location);
                final Path path = temp.getPath();
                if (temp.isRelative() && (path.size() > 1)) try {
                    final PathElement pg = (PathElement) path.get(0);
                    final PathElement fn = (PathElement) path.get(1);
                    final long id = Long.parseLong(pg.getName());
                    final AbstractPage ap = this.pageManager.getAbstractPage(id);
                    final Attachment at = ap.getAttachmentNamed(fn.getName());
                    location = this.locationManager.getLocation(at, false);
                } catch (NumberFormatException exception) {
                    LOGGER.error("Error parsing page id", exception);
                }
            }

            /* Restore the fragment we stripped above, if we have to */
            if (fragment != null) {
                location = new Location(location.getScheme(),
                                        location.getAuthority(),
                                        location.getPath(),
                                        location.getParameters(),
                                        fragment);
            }

            /* Now try to relativize every resolved URL */
            final Location relative = this.exportUrl.relativize(location, true);
            atts.setValue(x, relative.toString());

        } catch (Throwable throwable) {
            LOGGER.error("Error translating link attribute " + atts.getQName(x)
                         + "=\"" + atts.getValue(x) + "\"", throwable);
        }
    }

    /**
     * <p>If this tag is a meta tag specifying the Content-Type HTTP equivalence
     * for the current document, mark the tag as seen (so that we won't insert
     * it later on when we close the head tag).</p>
     */
    private void filter(QName name, XMLAttributes atts) {
        if(name.rawname.toUpperCase().equals(E_META)) {
            int length = atts.getLength();
            for(int i = 0; i < length; i++) {
                if(atts.getQName(i).toLowerCase().equals(A_HTTP_EQUIV)) {
                    if (atts.getValue(i).toLowerCase().equals(V_CONTENT_TYPE)) {
                        this.hasContentType = true;
                    }
                }
            }
        }
    }
}
